#---- Replication script 3/4----

#Turnbull-Dugarte
#"Liberal in the sheets [...]"


###This script produces all analysis from Study III

remove(list=ls())
library(pacman)


p_load(here, readxl, rio, ggpubr, dplyr, jtools, estimatr, stargazer, margins,
       modelsummary, Hmisc, scales, stringr, haven, readr, viridis, ggdist, patchwork,
       dplyr, tidyr, kableExtra, RColorBrewer, grf, interactions, starbility, plyr)


##Loading data##
df_study3 <- read_dta("data/study3_data.dta")
df_study3 <-as.data.frame(df_study3)

rescale <- function(x) {
  (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
vars_to_rescale <- c("reject_bimen", "reject_biwomen", "profile_attractive", "profile_feminine", "profile_masculine", "profile_trustworthy", "profile_faithful", "profile_adventurous", "profile_recommend", "profile_woulddate", "profile_gencongruent")

for (var in vars_to_rescale) {
  df_study3[[var]] <- rescale(df_study3[[var]])
}

df_study3$genderedpenalty <- (df_study3$reject_bimen - df_study3$reject_biwomen)
table(df_study3$genderedpenalty)


df_study3$treat_profile <- factor(df_study3$treat_profile)
df_study3$treat_profile <- relevel(df_study3$treat_profile, ref="Straight")

df3_het <- df_study3 %>% 
  filter(sexualorientation=="heterosexual")

df3_men <- df3_het  %>% 
  filter(gender==0)
df3_women <- df3_het  %>% 
  filter(gender==1)


###FIGURE 10### 
mod_full<- lm_robust(profile_recommend ~ treat_profile, data = df3_het)
tidy_full <- tidy(mod_full)
tidy_full$sample <- "Full sample"
tidy_full$outcome <- "Peers"

mod_full2<- lm_robust(profile_woulddate ~ treat_profile, data = df3_het)
tidy_full2 <- tidy(mod_full2)
tidy_full2$sample <- "Full sample"
tidy_full2$outcome <- "Self"

mod_men<- lm_robust(profile_recommend ~ treat_profile, data = df3_men)
tidy_men <- tidy(mod_men)
tidy_men$sample <- "Men"
tidy_men$outcome <- "Peers"

mod_men2<- lm_robust(profile_woulddate ~ treat_profile, data = df3_men)
tidy_men2 <- tidy(mod_men2)
tidy_men2$sample <- "Men"
tidy_men2$outcome <- "Self"

mod_women<- lm_robust(profile_recommend ~ treat_profile, data = df3_women)
tidy_women <- tidy(mod_women)
tidy_women$sample <- "Women"
tidy_women$outcome <- "Peers"

mod_women2<- lm_robust(profile_woulddate ~ treat_profile, data = df3_women)
tidy_women2 <- tidy(mod_women2)
tidy_women2$sample <- "Women"
tidy_women2$outcome <- "Self"


models<- rbind(tidy_full, tidy_men, tidy_women, tidy_full2, tidy_men2, tidy_women2)
models<-subset(models, term %in% c("treat_profileBi")) 

models$lower90 <- models$estimate - (1.645 * models$std.error)
models$higher90 <- models$estimate + (1.645 * models$std.error)
models$lower95 <- models$estimate - (1.96 * models$std.error)
models$higher95 <- models$estimate + (1.96 * models$std.error)
models$lower99 <- models$estimate - (2.58 * models$std.error)
models$higher99 <- models$estimate + (2.58 * models$std.error)

col2<- c("steelblue", "#B12A90FF")




mainplot<-ggplot(models, aes(y = sample, x=estimate, color=outcome)) + 
  geom_vline(xintercept = 0, linetype="longdash", colour="black", alpha=.5)+
  scale_color_manual(values = col2, guide = "none")+
  xlim(-.22,0)+
  geom_segment(data = subset(models, outcome == "Peers"),
               aes(x = lower90, xend = higher90, y = sample, yend = sample),
               size = 4, alpha = 1,  position = position_nudge(y = .1)) + 
  geom_segment(data = subset(models, outcome == "Peers"),
               aes(x = lower95, xend = higher95, y = sample, yend = sample),
               size = 4, alpha = .6,  position = position_nudge(y = .1)) + 
  geom_segment(data = subset(models, outcome == "Peers"),
               aes(x = lower99, xend = higher99, y = sample, yend = sample),
               size = 4, alpha = .3, position = position_nudge(y = .1)) + 
  geom_point(data = subset(models, outcome == "Peers"),
             aes(x = estimate, y = sample), fill = "white", color = "grey67", size = 2.5, pch = 21,  
             position = position_nudge(y = .1))+
  geom_segment(data = subset(models, outcome == "Self"),
               aes(x = lower90, xend = higher90, y = sample, yend = sample),
               size = 4, alpha = 1,  position = position_nudge(y = -.1)) + 
  geom_segment(data = subset(models, outcome == "Self"),
               aes(x = lower95, xend = higher95, y = sample, yend = sample),
               size = 4, alpha = .6,  position = position_nudge(y = -.1)) + 
  geom_segment(data = subset(models, outcome == "Self"),
               aes(x = lower99, xend = higher99, y = sample, yend = sample),
               size = 4, alpha = .3, position = position_nudge(y = -.1)) + 
  geom_point(data = subset(models, outcome == "Self"),
             aes(x = estimate, y = sample), fill = "white", color = "grey67", size = 2.5, pch = 21,  
             position = position_nudge(y = -.1))+
  geom_label(data = subset(models, outcome == "Self"),aes(label=sprintf("%.2f", 
                                                                        round(estimate, digits = 2))), nudge_y = -.3, size=2.5)+
  geom_label(data = subset(models, outcome == "Peers"),aes(label=sprintf("%.2f", 
                                                                         round(estimate, digits = 2))), nudge_y = .3, size=2.5)+
  theme_ggdist()+
  annotate("label", x = -.22, y = 3.45, label = "Would enourage a friend to date",
           color="steelblue", size=3, fontface="bold", hjust=0)+
  annotate("label", x = -.22, y = 3.25, label = "I would date",
           color="#B12A90FF", size=3, fontface="bold", hjust=0)+
  labs(x="",
       subtitle="Coefficients: ATE on propensity to date")+
  theme(legend.position = "none",
        axis.text.x=element_text(color="black"),
        axis.title.y=element_blank(),
        plot.subtitle = element_text(face="bold"),
        axis.text.y = element_text(color="black", face="bold"))+
  annotate("segment", x = -.2, xend = -.2, y = 1.9, yend = 2.9, color = "grey", linetype = "solid", alpha=.7)+
  annotate("segment", x = -.192, xend = -.2, y = 2.9, yend = 2.9, color = "grey", linetype = "solid", alpha=.7)+
  annotate("segment", x = -.125, xend = -.2, y = 1.9, yend = 1.9, color = "grey", linetype = "solid", alpha=.7)+
  annotate("label", x = -.2, y = 2.4, label = "-0.07 (p<0.05)", size = 2, fontface="bold",color = "black")


mod_diff_full<- lm_robust(profile_recommend ~ treat_profile*gender, data = df3_het)
tidy_diff_full <- tidy(mod_diff_full)
tidy_diff_full$sample <- "Full sample"
tidy_diff_full$outcome <- "Peers"

mod_diff_full2<- lm_robust(profile_woulddate ~ treat_profile*gender, data = df3_het)
tidy_diff_full2 <- tidy(mod_diff_full2)
tidy_diff_full2$sample <- "Full sample"
tidy_diff_full2$outcome <- "Self"

models_diff<- rbind(tidy_diff_full, tidy_diff_full2)
models_diff<-subset(models_diff, term %in% c("treat_profileBi:gender")) 
models_diff <- models_diff %>%
  mutate(sample= ifelse(sample == "Full sample", "Gender diff.\nin ATE", sample))

models_diff$lower90 <- models_diff$estimate - (1.645 * models_diff$std.error)
models_diff$higher90 <- models_diff$estimate + (1.645 * models_diff$std.error)
models_diff$lower95 <- models_diff$estimate - (1.96 * models_diff$std.error)
models_diff$higher95 <- models_diff$estimate + (1.96 * models_diff$std.error)
models_diff$lower99 <- models_diff$estimate - (2.58 * models_diff$std.error)
models_diff$higher99 <- models_diff$estimate + (2.58 * models_diff$std.error)

col2<- c("steelblue", "#B12A90FF")

diffplot<- ggplot(models_diff, aes(y = sample, x=estimate, color=outcome)) + 
  geom_vline(xintercept = 0, linetype="longdash", colour="black", alpha=.5)+
  scale_color_manual(values = col2, guide = "none")+
  xlim(-.2,.05)+
  geom_segment(data = subset(models_diff, outcome == "Peers"),
               aes(x = lower90, xend = higher90, y = sample, yend = sample),
               size = 4, alpha = 1,  position = position_nudge(y = .2)) + 
  geom_segment(data = subset(models_diff, outcome == "Peers"),
               aes(x = lower95, xend = higher95, y = sample, yend = sample),
               size = 4, alpha = .6,  position = position_nudge(y = .2)) + 
  geom_segment(data = subset(models_diff, outcome == "Peers"),
               aes(x = lower99, xend = higher99, y = sample, yend = sample),
               size = 4, alpha = .3, position = position_nudge(y = .2)) + 
  geom_point(data = subset(models_diff, outcome == "Peers"),
             aes(x = estimate, y = sample), fill = "white", color = "grey67", size = 2.5, pch = 21,  
             position = position_nudge(y = .2))+
  geom_segment(data = subset(models_diff, outcome == "Self"),
               aes(x = lower90, xend = higher90, y = sample, yend = sample),
               size = 4, alpha = 1,  position = position_nudge(y = -.2)) + 
  geom_segment(data = subset(models_diff, outcome == "Self"),
               aes(x = lower95, xend = higher95, y = sample, yend = sample),
               size = 4, alpha = .6,  position = position_nudge(y = -.2)) + 
  geom_segment(data = subset(models_diff, outcome == "Self"),
               aes(x = lower99, xend = higher99, y = sample, yend = sample),
               size = 4, alpha = .3, position = position_nudge(y = -.2)) + 
  geom_point(data = subset(models_diff, outcome == "Self"),
             aes(x = estimate, y = sample), fill = "white", color = "grey67", size = 2.5, pch = 21,  
             position = position_nudge(y = -.2))+
  geom_text(data = subset(models_diff, outcome == "Self"),aes(label=sprintf("%.2f", 
                                                                            round(estimate, digits = 2))), nudge_y = -.33, size=2.5)+
  geom_text(data = subset(models_diff, outcome == "Peers"),aes(label=sprintf("%.2f", 
                                                                             round(estimate, digits = 2))), nudge_y = .33, size=2.5)+
  theme_ggdist()+
  labs(x="",
       subtitle="Coefficients: ATE on self-reported propensity to date & recommend a friend to date")+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        axis.text.x=element_text(color="black"),
        axis.text.y = element_text(color="black", face="bold"))


cmen<- lm_robust(profile_gencongruent ~ treat_profile, data = df3_men)
tidy_cmen <- tidy(cmen)
tidy_cmen$sample <- "Men"

cwomen<- lm_robust(profile_gencongruent ~ treat_profile, data = df3_women)
tidy_cwomen <- tidy(cwomen)
tidy_cwomen$sample <- "Women"
cdiff<- lm_robust(profile_gencongruent ~ treat_profile*gender, data = df3_het)

cmodels<- rbind(tidy_cmen, tidy_cwomen)
cmodels<-subset(cmodels, term %in% c("treat_profileBi")) 

cmodels$lower90 <- cmodels$estimate - (1.645 * cmodels$std.error)
cmodels$higher90 <- cmodels$estimate + (1.645 * cmodels$std.error)
cmodels$lower95 <- cmodels$estimate - (1.96 * cmodels$std.error)
cmodels$higher95 <- cmodels$estimate + (1.96 * cmodels$std.error)
cmodels$lower99 <- cmodels$estimate - (2.58 * cmodels$std.error)
cmodels$higher99 <- cmodels$estimate + (2.58 * cmodels$std.error)

cbp2 <- c( "#FC4C02", "purple")
mechplot<- ggplot(cmodels, aes(y = term, x=estimate, color=sample)) + 
  geom_vline(xintercept = 0, linetype="longdash", colour="black", alpha=.5)+
  scale_color_manual(values = cbp2, guide = "none")+
  xlim(-.13,.02)+
  geom_segment(data = subset(cmodels, sample == "Men"),
               aes(x = lower90, xend = higher90, y = term, yend = term),
               size = 4, alpha = 1,  position = position_nudge(y = .1)) + 
  geom_segment(data = subset(cmodels, sample == "Men"),
               aes(x = lower95, xend = higher95, y = term, yend = term),
               size = 4, alpha = .6,  position = position_nudge(y = .1)) + 
  geom_segment(data = subset(cmodels, sample == "Men"),
               aes(x = lower99, xend = higher99, y = term, yend = term),
               size = 4, alpha = .3, position = position_nudge(y = .1)) + 
  geom_point(data = subset(cmodels, sample == "Men"),
             aes(x = estimate, y = term), fill = "white", color = "grey67", size = 2.5, pch = 21,  
             position = position_nudge(y = .1))+
  geom_segment(data = subset(cmodels, sample == "Women"),
               aes(x = lower90, xend = higher90, y = term, yend = term),
               size = 4, alpha = 1,  position = position_nudge(y = -.1)) + 
  geom_segment(data = subset(cmodels, sample == "Women"),
               aes(x = lower95, xend = higher95, y = term, yend = term),
               size = 4, alpha = .6,  position = position_nudge(y = -.1)) + 
  geom_segment(data = subset(cmodels, sample == "Women"),
               aes(x = lower99, xend = higher99, y = term, yend = term),
               size = 4, alpha = .3, position = position_nudge(y = -.1)) + 
  geom_point(data = subset(cmodels, sample == "Women"),
             aes(x = estimate, y = term), fill = "white", color = "grey67", size = 2.5, pch = 21,  
             position = position_nudge(y = -.1))+
  geom_label(data = subset(cmodels, sample == "Women"),aes(label=sprintf("%.3f", 
                                                                         round(estimate, digits = 3))), nudge_y = -.175, size=2.5)+
  geom_label(data = subset(cmodels, sample == "Men"),aes(label=sprintf("%.3f", 
                                                                       round(estimate, digits = 3))), nudge_y = -.005, size=2.5)+
  theme_ggdist()+
  labs(x="",
       title="",
       subtitle = "Coefficients: CATE on gender incongruence",
       caption = "")+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        plot.subtitle = element_text(face="bold"),
        plot.title = element_text(face="bold"),
        axis.text.x=element_text(color="black"),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())+
  annotate("label", x = -.11, y = 1.35, label = "Men",
           color="#FC4C02", size=3, fontface="bold", hjust=.5)+
  annotate("label", x = -.11, y = 1.25, label = "Women",
           color="purple", size=3, fontface="bold", hjust=.5)+
  annotate("segment", x = -.025, xend = -.025, y = 1.11, yend = 1.3, color = "grey", linetype = "solid", alpha=.7)+
  annotate("segment", x = -.0862, xend = -.025, y = 1.3, yend = 1.3, color = "grey", linetype = "solid", alpha=.7)+
  annotate("segment", x = -.0862, xend = -.0862, y = .91, yend = 1.3, color = "grey", linetype = "solid", alpha=.7)+
  annotate("label", x = -.0525, y = 1.3, label = "-0.061 (p<0.01)", size = 2, fontface="bold",color = "black")

(mainplot + mechplot) +
  plot_annotation(
    title = "Study 3: Mechanism test from vignette experiment",
    caption = "Confidence intervals at 99%, 95% & 90% (N:1605)",
    theme = theme(
      plot.title = element_text(face = "bold", size = 14, margin = margin(b = 0)),
      plot.caption = element_text(margin = margin(t = 0))))

ggsave("figures/Figure10.png", height = 5, width = 7.5, dpi = 600)
ggsave("figures/tifs/Figure10.tif", height = 5, width = 7.5, dpi = 600)

###TABLE 2###
Table2 <- list()
Table2 [['Gender congruent']] <- lm_robust(profile_gencongruent ~ treat_profile*gender, df3_het)
Table2 [['Attractive']] <- lm_robust(profile_attractive ~ treat_profile*gender, df3_het)
Table2 [['Trustworthy']] <- lm_robust(profile_trustworthy ~ treat_profile*gender, df3_het)
Table2 [['Faithful']] <- lm_robust(profile_faithful ~ treat_profile*gender, df3_het)
Table2 [['Adventurous']] <- lm_robust(profile_adventurous ~ treat_profile*gender, df3_het)
msummary(Table2, star=TRUE, output = "tables/Table2.tex")

##FIGURE 9###
df_study3 <- df_study3 %>%
  filter(!is.na(gender))
df_study3 <- df_study3 %>%
  mutate(gender= as.factor(gender))

menmean <- ddply(df_study3, "gender", summarise, meanx=mean(reject_bimen, na.rm = TRUE))
womenmean <- ddply(df_study3, "gender", summarise, meanx=mean(reject_biwomen, na.rm = TRUE))
relativemean <- ddply(df_study3, "gender", summarise, meanx=mean(genderedpenalty, na.rm = TRUE))

hist1<- ggplot(df_study3, aes(x=reject_bimen, fill=gender, color=gender)) +
  geom_density(alpha=.4, adjust = 2) +
  theme_ggdist()+
  xlim(0,1)+
  scale_fill_manual(values = c( "#FC4C02", "purple")) +
  scale_color_manual(values = c( "#FC4C02", "purple"))+
  geom_vline(data=menmean, aes(xintercept=meanx,  colour=gender),
             linetype="dashed", size=1, alpha=.5)+
  labs(y="", x="Negatively (0) - Positively (1)", subtitle="How do you think a straight woman would view\nthe prospect of dating a bisexual man?")+
  theme(legend.position = "none",
        plot.subtitle = element_text(face="bold"))

hist2<- ggplot(df_study3, aes(x=reject_biwomen, fill=gender, color=gender)) +
  geom_density(alpha=.4, adjust = 2) +
  theme_ggdist()+
  xlim(0,1)+
  scale_fill_manual(values = c( "#FC4C02", "purple")) +
  scale_color_manual(values = c( "#FC4C02", "purple"))+
  geom_vline(data=womenmean, aes(xintercept=meanx,  colour=gender),
             linetype="dashed", size=1, alpha=.5)+
  labs(y="", x="Negatively (0) - Positively (1)", subtitle="How do you think a straight man would view\nthe prospect of dating a bisexual woman?")+
  theme(legend.position = "none",
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        plot.subtitle = element_text(face="bold"))

hist3<- ggplot(df_study3, aes(x=genderedpenalty, fill=gender, color=gender)) +
  geom_density(alpha=.4, adjust = 2) +
  theme_ggdist()+
  xlim(-1,1)+
  scale_fill_manual(values = c( "#FC4C02", "purple")) +
  scale_color_manual(values = c( "#FC4C02", "purple"))+
  geom_vline(data=relativemean, aes(xintercept=meanx,  colour=gender),
             linetype="dashed", size=1, alpha=.5)+
  labs(y="Density", x="Difference in gendered penalty", 
       subtitle="View of relative penalty/premium for men and women")+
  theme(legend.position = "none",
        plot.subtitle = element_text(face="bold"))+
  annotate("label", x = 0.5, y = .9, label = "Men",
           color="#FC4C02", size=4, fontface="bold", hjust=0)+
  annotate("label", x = 0.5, y = .75, label = "Women",
           color="purple", size=4, fontface="bold", hjust=0)


(hist1+hist2)/hist3+
  plot_annotation(caption = 'Dashed lines indicate mean value for men & women (N:1971)\nNegative relative penalty indicates stronger penalty against men',
                  title = 'Study 3: Empirical expectations on gendered penalty') & 
  theme(plot.title = element_text(face="bold"))
ggsave("figures/Figure9.png", width=8, dpi=600)
ggsave("figures/tifs/Figure9.tif", width=8, dpi=600)
###APPENDIX MATERIAL###

TableC1 <- list()
TableC1 [['Would Date I']] <- lm_robust(profile_woulddate ~ treat_profile, df3_het)
TableC1 [['Would Date II']] <- lm_robust(profile_woulddate ~ treat_profile*gender, df3_het)
TableC1 [['Would Reccommend I']] <- lm_robust(profile_recommend ~ treat_profile, df3_het)
TableC1 [['Would Reccommend II']] <- lm_robust(profile_recommend ~ treat_profile*gender, df3_het)
msummary(TableC1, star=TRUE, output = "tables/TableC1.tex")


TabelC2 <- list()
TabelC2 [['Gender congruent']] <- lm_robust(profile_gencongruent ~ treat_profile, df3_het)
TabelC2 [['Attractive']] <- lm_robust(profile_attractive ~ treat_profile, df3_het)
TabelC2 [['Trustworthy']] <- lm_robust(profile_trustworthy ~ treat_profile, df3_het)
TabelC2 [['Faithful']] <- lm_robust(profile_faithful ~ treat_profile, df3_het)
TabelC2 [['Adventurous']] <- lm_robust(profile_adventurous ~ treat_profile, df3_het)
msummary(TabelC2, star=TRUE, output = "tables/TabelC2.tex")

df3_nohet <- df_study3 %>% 
  filter(sexualorientation!="heterosexual")

TableC3 <- list()
TableC3 [['Would Date I']] <- lm_robust(profile_woulddate ~ treat_profile, df3_nohet)
TableC3 [['Would Date II']] <- lm_robust(profile_woulddate ~ treat_profile*gender, df3_nohet)
TableC3 [['Would Reccommend I']] <- lm_robust(profile_recommend ~ treat_profile, df3_nohet)
TableC3 [['Would Reccommend II']] <- lm_robust(profile_recommend ~ treat_profile*gender, df3_nohet)
msummary(TableC3, star=TRUE, output = "tables/TableC3.tex")


TableC4 <- list()
TableC4 [['Gender congruent']] <- lm_robust(profile_gencongruent ~ treat_profile*gender, df3_nohet)
TableC4 [['Attractive']] <- lm_robust(profile_attractive ~ treat_profile*gender, df3_nohet)
TableC4 [['Trustworthy']] <- lm_robust(profile_trustworthy ~ treat_profile*gender, df3_nohet)
TableC4 [['Faithful']] <- lm_robust(profile_faithful ~ treat_profile*gender, df3_nohet)
TableC4 [['Adventurous']] <- lm_robust(profile_adventurous ~ treat_profile*gender, df3_nohet)
msummary(TableC4, star=TRUE, output = "tables/TableC4.tex")



###Specification Curve###

detach("package:plyr", unload = TRUE)

df3_het <- df3_het %>%
  mutate(profileBI = factor(profileBI))

controls <- c("gender", "age", "degree", "ethnicity", "partyID", "partners", "rightwing", "attentionpass")
control_labels <- c(
  gender = "Gender",
  age = "Age",
  degree = "Education",
  ethnicity = "Ethnicity",
  partyID = "Partisan ID",
  partners = "N. of partners",
  rightwing = "Ideology",
  attentionpass = "Attention checks")


control_sets <- unlist(lapply(1:length(controls), function(i) {
  combn(controls, i, simplify = FALSE)
}), recursive = FALSE)

spec_results <- tibble(control_set = seq_along(control_sets)) %>%
  mutate(
    controls = map(control_set, ~ control_sets[[.x]]),
    formula_chr = map_chr(controls, ~ paste("profile_recommend ~", paste(c("profileBI", .x), collapse = " + "))),
    formula_obj = map(formula_chr, as.formula),
    model = map(formula_obj, ~ lm(.x, data = df3_het)),
    tidy = map(model, ~ broom::tidy(.x))
  ) %>%
  unnest(tidy) %>%
  filter(term == "profileBI2") %>%
  mutate(
    lower90 = estimate - 1.645 * std.error,
    upper90 = estimate + 1.645 * std.error,
    lower95 = estimate - 1.96 * std.error,
    upper95 = estimate + 1.96 * std.error,
    sig_05 = p.value < 0.05,
    model_id = row_number()
  ) %>%
  arrange(estimate) %>%
  mutate(model_id = row_number())

spec_results <- spec_results %>%
  arrange(estimate) %>%
  mutate(model_id = row_number())

min_estimate <- min(spec_results$estimate, na.rm = TRUE)
median_estimate <- median(spec_results$estimate, na.rm = TRUE)
max_estimate <- max(spec_results$estimate, na.rm = TRUE)


spec_matrix <- spec_results %>%
  select(model_id, controls) %>%
  mutate(
    control_flags = map(controls, ~ set_names(rep(1, length(.x)), .x))  
  ) %>%
  select(model_id, control_flags) %>%
  unnest_wider(control_flags, names_repair = "unique") %>%  
  pivot_longer(
    cols = -model_id,
    names_to = "control",
    values_to = "included"
  ) %>%
  mutate(
    included = factor(ifelse(is.na(included), 0, included))  )
min_estimate <- min(spec_results$estimate, na.rm = TRUE)
median_estimate <- median(spec_results$estimate, na.rm = TRUE)
max_estimate <- max(spec_results$estimate, na.rm = TRUE)


spec_matrix <- spec_results %>%
  select(model_id, controls) %>%
  mutate(
    control_flags = map(controls, ~ set_names(rep(1, length(.x)), .x))  
  ) %>%
  select(model_id, control_flags) %>%
  unnest_wider(control_flags, names_repair = "unique") %>%  
  pivot_longer(
    cols = -model_id,
    names_to = "control",
    values_to = "included"
  ) %>%
  mutate(
    included = factor(ifelse(is.na(included), 0, included))  )

p_curve <- ggplot(spec_results, aes(x = model_id, y = estimate)) +
  geom_hline(yintercept = median_estimate, linetype = "solid", linewidth=1, color = "#660033") +
  geom_linerange(aes(ymin = lower95, ymax = upper95), alpha = 0.5, linewidth = 1, color = "#E0115F") +
  geom_linerange(aes(ymin = lower90, ymax = upper90), alpha = 0.9, linewidth = 1, color = "#E0115F") +
  geom_point(size = 2, fill="white", color = "#E0115F", pch=21) +  
  geom_hline(yintercept = 0, linetype = "dashed", color = "#660033") +
  labs(subtitle="Effect on 'would encourage a friend to date'",
       title = "255 different specifications with a median ATE of -8.9 percentage-points",
       x = "",
       y = "Average treatment effect",
       shape = "p < 0.05") +
  annotate(geom="label", x = 50, y =(median_estimate+.01), size = 3, color = "#660033", fontface=2,
           label = "Median point-estimate: -0.089")+
  theme_ggdist()+
  theme(
    plot.title = element_text(size=14, face="bold"))

spec_matrix <- spec_matrix %>%
  mutate(control = recode(control, !!!control_labels))

p_controls <- ggplot(spec_matrix, aes(x = model_id, y = fct_rev(factor(control)), fill = included)) +
  geom_tile(color = "white", linewidth = 0.2) +
  scale_fill_manual(values = c("0" = "gray90", "1" = "#E0115F")) +
  labs(x = NULL, y = "Included Controls") +
  theme_ggdist()+
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    panel.grid = element_blank(),
    plot.title = element_text(size=14, face="bold"),
    legend.position = "none")

p_curve / p_controls +
  plot_layout(heights = c(3, 1))+
  plot_annotation(title = 'Study 3: Multiverse analysis I',
                  caption = "Confidence intervals at 95% & 90%",
                  theme = theme(plot.title = element_text(face = "bold", size=18),
                                plot.subtitle = element_text(size=14)))
ggsave("figures/appendix/FigureC6.png", width=8.5)


spec_results <- tibble(control_set = seq_along(control_sets)) %>%
  mutate(
    controls = map(control_set, ~ control_sets[[.x]]),
    formula_chr = map_chr(controls, ~ paste("profile_woulddate ~", paste(c("profileBI", .x), collapse = " + "))),
    formula_obj = map(formula_chr, as.formula),
    model = map(formula_obj, ~ lm(.x, data = df3_het)),
    tidy = map(model, ~ broom::tidy(.x))
  ) %>%
  unnest(tidy) %>%
  filter(term == "profileBI2") %>%
  mutate(
    lower90 = estimate - 1.645 * std.error,
    upper90 = estimate + 1.645 * std.error,
    lower95 = estimate - 1.96 * std.error,
    upper95 = estimate + 1.96 * std.error,
    sig_05 = p.value < 0.05,
    model_id = row_number()
  ) %>%
  arrange(estimate) %>%
  mutate(model_id = row_number())

spec_results <- spec_results %>%
  arrange(estimate) %>%
  mutate(model_id = row_number())

min_estimate <- min(spec_results$estimate, na.rm = TRUE)
median_estimate <- median(spec_results$estimate, na.rm = TRUE)
max_estimate <- max(spec_results$estimate, na.rm = TRUE)


spec_matrix <- spec_results %>%
  select(model_id, controls) %>%
  mutate(
    control_flags = map(controls, ~ set_names(rep(1, length(.x)), .x))  
  ) %>%
  select(model_id, control_flags) %>%
  unnest_wider(control_flags, names_repair = "unique") %>%  
  pivot_longer(
    cols = -model_id,
    names_to = "control",
    values_to = "included"
  ) %>%
  mutate(
    included = factor(ifelse(is.na(included), 0, included))  )
min_estimate <- min(spec_results$estimate, na.rm = TRUE)
median_estimate <- median(spec_results$estimate, na.rm = TRUE)
max_estimate <- max(spec_results$estimate, na.rm = TRUE)


spec_matrix <- spec_results %>%
  select(model_id, controls) %>%
  mutate(
    control_flags = map(controls, ~ set_names(rep(1, length(.x)), .x))  
  ) %>%
  select(model_id, control_flags) %>%
  unnest_wider(control_flags, names_repair = "unique") %>%  
  pivot_longer(
    cols = -model_id,
    names_to = "control",
    values_to = "included"
  ) %>%
  mutate(
    included = factor(ifelse(is.na(included), 0, included))  )

p_curve <- ggplot(spec_results, aes(x = model_id, y = estimate)) +
  geom_hline(yintercept = median_estimate, linetype = "solid", linewidth=1, color = "#660033") +
  geom_linerange(aes(ymin = lower95, ymax = upper95), alpha = 0.5, linewidth = 1, color = "#E0115F") +
  geom_linerange(aes(ymin = lower90, ymax = upper90), alpha = 0.9, linewidth = 1, color = "#E0115F") +
  geom_point(size = 2, fill="white", color = "#E0115F", pch=21) +  
  geom_hline(yintercept = 0, linetype = "dashed", color = "#660033") +
  labs(subtitle="Effect on 'would personally date'",
       title = "255 different specifications with a median ATE of -11 percentage-points",
       x = "",
       y = "Average treatment effect",
       shape = "p < 0.05") +
  annotate(geom="label", x = 50, y =(median_estimate+.01), size = 3, color = "#660033", fontface=2,
           label = "Median point-estimate: -0.105")+
  theme_ggdist()+
  theme(
    plot.title = element_text(size=14, face="bold"))

spec_matrix <- spec_matrix %>%
  mutate(control = recode(control, !!!control_labels))

p_controls <- ggplot(spec_matrix, aes(x = model_id, y = fct_rev(factor(control)), fill = included)) +
  geom_tile(color = "white", linewidth = 0.2) +
  scale_fill_manual(values = c("0" = "gray90", "1" = "#E0115F")) +
  labs(x = NULL, y = "Included Controls") +
  theme_ggdist()+
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    panel.grid = element_blank(),
    plot.title = element_text(size=14, face="bold"),
    legend.position = "none")

p_curve / p_controls +
  plot_layout(heights = c(3, 1))+
  plot_annotation(title = 'Study 3: Multiverse analysis II',
                  caption = "Confidence intervals at 95% & 90%",
                  theme = theme(plot.title = element_text(face = "bold", size=18),
                                plot.subtitle = element_text(size=14)))
ggsave("figures/appendix/FigureC7.png", width=8.5)



###CRF###

df3_men<- df3_men  %>% 
  mutate(treat_num=as.numeric(treat_profile),
         age=as.numeric(age),
         degreenum=as.numeric(degree),
         rightnum=as.numeric(rightwing),
         attentionnum=as.numeric(attentionpass),
         nonwhitenum=as.numeric(nonwhite),
         outcome1=as.numeric(profile_recommend),
         outcome2=as.numeric(profile_woulddate))


df3_women<- df3_women  %>% 
  mutate(treat_num=as.numeric(treat_profile),
         age=as.numeric(age),
         degreenum=as.numeric(degree),
         rightnum=as.numeric(rightwing),
         attentionnum=as.numeric(attentionpass),
         nonwhitenum=as.numeric(nonwhite),
         outcome1=as.numeric(profile_recommend),
         outcome2=as.numeric(profile_woulddate))
covs <- c("age", "degreenum", "rightnum", "attentionnum", "nonwhitenum")

index <- with(df3_men, complete.cases(outcome1, treat_num))
X <- df3_men[index, c(covs)]

X <- as.matrix(X)
Y_Obs <- df3_men$outcome1[index]
W <- df3_men$treat_num[index]
Z <- df3_men$treat_num[index]

set.seed(123)
tau_forest <- instrumental_forest(Y = Y_Obs, X = X, W = W, Z = Z, 
                                  honesty = TRUE, num.trees = 4000, seed = 123)

tau_hat_forest <- predict(tau_forest, estimate.variance = TRUE)

sigma_hat <- sqrt(tau_hat_forest$variance.estimates)

grf_df <- 
  cbind(data.frame(
    tau_hat = tau_hat_forest$predictions,
    ci_upper = tau_hat_forest$predictions + 1.96 * sigma_hat,
    ci_lower = tau_hat_forest$predictions - 1.96 * sigma_hat),
    X
  )


grf_df$order <- with(grf_df, rank(tau_hat, ties.method = "first"))


grf_plot1 <- 
  grf_df %>% 
  ggplot(., aes(x = tau_hat, y = order)) +
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper),
                 alpha = 0.15, height = 0, size = 0.5, color="steelblue") + 
  geom_point(size = 1, pch = 20, color="navyblue") +
  labs(subtitle = "Would reccommend a friend to date",
       x = "", 
       y = "")+
  theme_ggdist()+
  xlim(-.4,.3)+
  theme(strip.background = element_blank(),
        strip.text = element_text(color="black", size=10, face="bold", hjust=.5),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())


index <- with(df3_men, complete.cases(outcome2, treat_num))
X <- df3_men[index, c(covs)]

X <- as.matrix(X)
Y_Obs <- df3_men$outcome2[index]
W <- df3_men$treat_num[index]
Z <- df3_men$treat_num[index]

set.seed(123)
tau_forest <- instrumental_forest(Y = Y_Obs, X = X, W = W, Z = Z, 
                                  honesty = TRUE, num.trees = 4000, seed = 123)

tau_hat_forest <- predict(tau_forest, estimate.variance = TRUE)

sigma_hat <- sqrt(tau_hat_forest$variance.estimates)

grf_df <- 
  cbind(data.frame(
    tau_hat = tau_hat_forest$predictions,
    ci_upper = tau_hat_forest$predictions + 1.96 * sigma_hat,
    ci_lower = tau_hat_forest$predictions - 1.96 * sigma_hat),
    X
  )


grf_df$order <- with(grf_df, rank(tau_hat, ties.method = "first"))


grf_plot2 <- 
  grf_df %>% 
  ggplot(., aes(x = tau_hat, y = order)) +
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper),
                 alpha = 0.15, height = 0, size = 0.5, color="#B12A90FF") + 
  geom_point(size = 1, pch = 20, color="navyblue") +
  labs(subtitle = "I would date",
       x = "Effects among men", 
       y = "")+
  theme_ggdist()+
  xlim(-.4,.3)+
  theme(strip.background = element_blank(),
        strip.text = element_text(color="black", size=10, face="bold", hjust=.5),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.x = element_text(face="bold"))


index <- with(df3_women, complete.cases(outcome1, treat_num))
X <- df3_women[index, c(covs)]

X <- as.matrix(X)
Y_Obs <- df3_women$outcome1[index]
W <- df3_women$treat_num[index]
Z <- df3_women$treat_num[index]

set.seed(123)
tau_forest <- instrumental_forest(Y = Y_Obs, X = X, W = W, Z = Z, 
                                  honesty = TRUE, num.trees = 4000, seed = 123)

tau_hat_forest <- predict(tau_forest, estimate.variance = TRUE)

sigma_hat <- sqrt(tau_hat_forest$variance.estimates)

grf_df <- 
  cbind(data.frame(
    tau_hat = tau_hat_forest$predictions,
    ci_upper = tau_hat_forest$predictions + 1.96 * sigma_hat,
    ci_lower = tau_hat_forest$predictions - 1.96 * sigma_hat),
    X
  )


grf_df$order <- with(grf_df, rank(tau_hat, ties.method = "first"))


grf_plot3 <- 
  grf_df %>% 
  ggplot(., aes(x = tau_hat, y = order)) +
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper),
                 alpha = 0.15, height = 0, size = 0.5, color="steelblue") + 
  geom_point(size = 1, pch = 20, color="navyblue") +
  labs(subtitle = "Would reccommend a friend to date",
       x = "", 
       y = "")+
  theme_ggdist()+
  xlim(-.4,.3)+
  theme(strip.background = element_blank(),
        strip.text = element_text(color="black", size=10, face="bold", hjust=.5),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())



index <- with(df3_women, complete.cases(outcome2, treat_num))
X <- df3_women[index, c(covs)]

X <- as.matrix(X)
Y_Obs <- df3_women$outcome2[index]
W <- df3_women$treat_num[index]
Z <- df3_women$treat_num[index]

set.seed(123)
tau_forest <- instrumental_forest(Y = Y_Obs, X = X, W = W, Z = Z, 
                                  honesty = TRUE, num.trees = 4000, seed = 123)

tau_hat_forest <- predict(tau_forest, estimate.variance = TRUE)

sigma_hat <- sqrt(tau_hat_forest$variance.estimates)

grf_df <- 
  cbind(data.frame(
    tau_hat = tau_hat_forest$predictions,
    ci_upper = tau_hat_forest$predictions + 1.96 * sigma_hat,
    ci_lower = tau_hat_forest$predictions - 1.96 * sigma_hat),
    X
  )


grf_df$order <- with(grf_df, rank(tau_hat, ties.method = "first"))


grf_plot4 <- 
  grf_df %>% 
  ggplot(., aes(x = tau_hat, y = order)) +
  geom_vline(xintercept = 0, linetype="longdash", colour="black")+
  scale_color_manual(values = colors, guide = "none")+
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper),
                 alpha = 0.15, height = 0, size = 0.5, color="#B12A90FF") + 
  geom_point(size = 1, pch = 20, color="navyblue") +
  labs(subtitle = "I would date",
       x = "Effects among women", 
       y = "")+
  theme_ggdist()+
  xlim(-.4,.3)+
  theme(strip.background = element_blank(),
        strip.text = element_text(color="black", size=10, face="bold", hjust=.5),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.x = element_text(face="bold"))


(grf_plot1+grf_plot3)/(grf_plot2+grf_plot4)+
  plot_annotation(caption = 'Red reference line indicates primary effect among full sample',
                  title = 'Study 3 - Causal random forest') & 
  theme(plot.title = element_text(face="bold"))
ggsave("FigureC5.png",
       path = here("figures/appendix"),dpi=800)



